In [1]:
# Required Packages
import pandas as pd
import numpy as np

# preprocessing
from sklearn.impute import SimpleImputer

# Visualisation libraries

## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex

## progress bar
import progressbar

## seaborn
import seaborn as sns
sns.set_context('paper', rc={'font.size':12,'axes.titlesize':14,'axes.labelsize':12})
sns.set_style('white')

## matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
import matplotlib.gridspec as gridspec
import matplotlib.colors
from pylab import rcParams
plt.style.use('seaborn-whitegrid')
plt.rcParams['figure.figsize'] = 14, 8
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline

## plotly
from plotly.offline import init_notebook_mode, iplot 
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
%config InlineBackend.figure_format = 'retina' 

import warnings
warnings.filterwarnings("ignore")

Starbucks Offer Personalizations

In this article, we investigate a set simulated dataset that mimics customer behavior on the Starbucks rewards mobile app. Starbucks tends to send out offers to users of the mobile app once every few days. These offers are exclusive, that is not all users receive the same offer. An offer can contain a discount for their products or sometimes BOGO (buy one get one free). These offers have a validity period before the offer expires. The article here is inspired by a towardsdatascience.com article.

Loading the Datasets

In [2]:
def Line(N): return N*'='
def Header(Inp, Length = 120):
    print(Back.BLACK + Fore.CYAN + Style.NORMAL + '%s' % Inp + Style.RESET_ALL
         + Fore.BLUE + Style.NORMAL + ' %s' % Line(Length- len(Inp) - 1) + Style.RESET_ALL)
def Bottom(Length = 120):
    print(Fore.BLUE + Style.NORMAL + '%s' % Line(Length) + Style.RESET_ALL)
    
# Portfolio Dataset
Header('Portfolio Dataset:')
Portfolio = pd.read_csv('StarBucks/Portfolio_Clean.csv')
display(Portfolio.head().style.hide_index())

# Profile Dataset
Header('Profile Dataset:')
Profile = pd.read_csv('StarBucks/Profile_Clean.csv')
display(Profile.head().style.hide_index())

# Transcript Dataset
Header('Transcript Dataset:')
Transcript = pd.read_csv('StarBucks/Transcript_Clean.csv')
display(Transcript.head().style.hide_index())
Bottom()

User_Data = pd.read_csv('StarBucks/User_Data.csv')
Data = pd.read_csv('StarBucks/Data.csv')
Portfolio Dataset: =====================================================================================================
Reward Difficulty Duration Offer_Type Offer_ID Email Mobile Social Web
10 10 7 BOGO ae264e3637204a6fb9bb56bc8210ddfd 1 1 1 0
10 10 5 BOGO 4d5c57ea9a6940dd891ad53e9dbe8da0 1 1 1 1
0 0 4 Informational 3f207df678b143eea3cee63160fa8bed 1 1 0 1
5 5 7 BOGO 9b98b8c7a33c4b65b9aebfe6a799e6d9 1 1 0 1
5 20 10 Discount 0b1e1539f2cc45b7b9fa7c272da2e1d7 1 0 0 1
Profile Dataset: =======================================================================================================
Gender Age ID Became_Member_On Income Member_Since_Year Member_Tenure
Other 55.000000 68be06ca386d4c31939f3a4f0e3dd783 2017-02-12 64000.000000 2017 23.000000
Female 55.000000 0610b486422d4921ae7d2bf64640c50b 2017-07-15 112000.000000 2017 18.000000
Other 55.000000 38fe809add3b4fcf9315a9694bb96ff5 2018-07-12 64000.000000 2018 6.000000
Female 75.000000 78afa995795e4d85b5d9ceeca43f5fef 2017-05-09 100000.000000 2017 20.000000
Other 55.000000 a03223e636434f42ac4c3df47e8bac43 2017-08-04 64000.000000 2017 17.000000
Transcript Dataset: ====================================================================================================
Person Event Value Time Amount Reward Offer_ID
78afa995795e4d85b5d9ceeca43f5fef Offer Received {'offer id': '9b98b8c7a33c4b65b9aebfe6a799e6d9'} 0 nan nan 9b98b8c7a33c4b65b9aebfe6a799e6d9
a03223e636434f42ac4c3df47e8bac43 Offer Received {'offer id': '0b1e1539f2cc45b7b9fa7c272da2e1d7'} 0 nan nan 0b1e1539f2cc45b7b9fa7c272da2e1d7
e2127556f4f64592b11af22de27a7932 Offer Received {'offer id': '2906b810c7d4411798c6938adc9daaa5'} 0 nan nan 2906b810c7d4411798c6938adc9daaa5
8ec6ce2a7e7949b1bf142def7d0e0586 Offer Received {'offer id': 'fafdcd668e3743c1bb461111dcafc2a4'} 0 nan nan fafdcd668e3743c1bb461111dcafc2a4
68617ca6246f4fbc85e91a2a49552598 Offer Received {'offer id': '4d5c57ea9a6940dd891ad53e9dbe8da0'} 0 nan nan 4d5c57ea9a6940dd891ad53e9dbe8da0
========================================================================================================================
Feature Description in the Datasets
Feature Description
Reward (int) Given reward for completing an offer
Channels (list of strings) Email, mobile app, social media, etc
Difficulty (int) Minimum spending requirement for completing an offer
Duration (int) Time that an offer is valid
Offer_Type (string) Type of offer
ID (string) Offer ID
Feature Description
Gender (str) Customers gender
Age (int) Customers age
ID (str) Customers ID
Became_Member_On (int) Date of membership
Income (float) Customer's income
Feature Description
Person (str) Customer ID
Event (str) Record description
time (int) Time in hours (since the beginning of the study)
Value - (dict of strings) Offer ID or transaction amount

Exploratory Data Analysis

In this section, a variety of plots are provided to a better understanding of the relationships between features.

Channel Distribution for Various Offers

In [3]:
Offer_Type_List = ['Email','Mobile','Social','Web']
Group = Portfolio.groupby('Offer_Type')[Offer_Type_List].sum()
display(Group)
Group = Group.reset_index().melt(id_vars= ['Offer_Type'], value_vars= Group.columns, var_name='Channels', value_name='Count')

Colors = ['LightBlue', 'DeepSkyBlue', 'CornFlowerBlue', 'RoyalBlue', 'MediumBlue', 'Navy']
fig = px.bar(Group, x='Offer_Type', y='Count', orientation='v',
             color = 'Channels', barmode='group', color_discrete_sequence= Colors, height= 400)
fig.update_traces(marker_line_color= Colors[-1], marker_line_width=0.8, opacity=1)
fig.update_layout(plot_bgcolor= 'white', width = 700)
fig['layout']['yaxis'].update(range=[0, 5])
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_layout(title={'text': 'Channel Distribution for Various Offers',
                         'x':0.5, 'y':0.91,
                         'xanchor': 'center', 'yanchor': 'top'},
                 xaxis_title='Offer Type')
fig.show()
Email Mobile Social Web
Offer_Type
BOGO 4 4 3 3
Discount 4 3 2 4
Informational 2 2 1 1

It seems that social media is the least efficient channel among all.

Age Distribution

In [4]:
fig = px.histogram(Profile, x = 'Age', nbins=10,  marginal= 'box', color_discrete_sequence= ['LimeGreen'],
                  hover_data=['Age'])
fig.update_layout(plot_bgcolor= 'white', width = 800)
fig.update_layout(title={'text': 'Age Distribution',
                         'x':0.5, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'},
                 yaxis_title='Frequency')
fig.update_traces(marker_line_color= 'DarkGreen', marker_line_width=1, opacity=1)
fig.add_trace(go.Scatter(x= Profile['Age'].median()* np.ones(6000), y= np.arange(0,6000),
                         name="Median",
                        line=dict(color='RoyalBlue', width=2, dash='dot')))
fig.add_trace(go.Scatter(x= Profile['Age'].mean()* np.ones(6000), y= np.arange(0,6000),
                         name="Mean",
                        line=dict(color='Red', width=2, dash='dot')))
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 6e3])
fig.show()

Costumers are mainly from the age group 50-60.

Income Distribution

In [5]:
fig = px.histogram(Profile, x = 'Income', nbins=10,  marginal= 'box', color_discrete_sequence= ['LightSkyblue'],
                  hover_data=['Income'])
fig.update_layout(plot_bgcolor= 'white', width = 800)
fig.update_layout(title={'text': 'Income Distribution',
                         'x':0.5, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'},
                 yaxis_title='Frequency')
fig.update_traces(marker_line_color= 'Navy', marker_line_width=1, opacity=1)
fig.add_trace(go.Scatter(x= Profile['Income'].median()* np.ones(6000), y= np.arange(0,6000),
                         name="Median",
                        line=dict(color='Black', width=2, dash='dot')))
fig.add_trace(go.Scatter(x= Profile['Income'].mean()* np.ones(6000), y= np.arange(0,6000),
                         name="Mean",
                        line=dict(color='Red', width=2, dash='dot')))
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 5e3])
fig.show()

Most of the customers' income is around 60K.

Gender Distribution

In [6]:
Temp = Profile.groupby(['Gender'])['Gender'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
Temp.reset_index(drop = False, inplace = True)

fig = make_subplots(rows=1, cols=2, specs=[[{"type": "xy"}, {'type':'domain'}]])
Colors =  ['LightPink','CornFlowerBlue','LightGreen']
# Left
fig.add_trace(go.Bar(x= Temp['Gender'].values, y= Temp['Count'].values, marker_color= Colors,
                     textposition='inside', showlegend = False), 1,1)

# Right

fig.add_trace(go.Pie(labels= Temp['Gender'].values,
                     values= Temp['Count'].values, textfont=dict(size=16),
                     marker=dict(colors = Colors, line=dict(color='black', width=1))), 1, 2)
# Updates
fig.update_layout(plot_bgcolor= 'white')
fig.update_yaxes(title_text='Average', range=[0, 9e3], row=1, col=1)
fig.update_traces(marker_line_color='black', marker_line_width=1, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')

fig.show()
Count Percentage
Gender
Female 6129 36.05
Male 8484 49.91
Other 2387 14.04

The number of male customers is larger than the number of female customers.

Memberships

In [7]:
Temp = Profile.groupby(['Member_Since_Year'])['Member_Since_Year'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
Temp.reset_index(drop = False, inplace = True)

fig = make_subplots(rows=1, cols=2, specs=[[{"type": "xy"}, {'type':'domain'}]])
Colors =  ['HoneyDew','GreenYellow', 'Bisque','LightSalmon','Plum', 'LightSkyBlue']
# Left
fig.add_trace(go.Bar(x= Temp['Member_Since_Year'].values, y= Temp['Count'].values, marker_color= Colors,
                     textposition='inside', showlegend = False), 1,1)

# Right

fig.add_trace(go.Pie(labels= Temp['Member_Since_Year'].values,
                     values= Temp['Count'].values, textfont=dict(size=16),
                     marker=dict(colors = Colors, line=dict(color='black', width=1))), 1, 2)
# Updates
fig.update_layout(plot_bgcolor= 'white')
fig.update_layout(xaxis = dict(tickmode = 'array', tickvals = np.arange(2013, 2019)))

fig.update_yaxes(title_text='Average', range=[0, 7e3], row=1, col=1)
fig.update_traces(marker_line_color='black', marker_line_width=1, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')

fig.show()
Count Percentage
Member_Since_Year
2013 286 1.68
2014 691 4.06
2015 1830 10.76
2016 3526 20.74
2017 6469 38.05
2018 4198 24.69

Membership Growth Over Year by Gender

In [8]:
Temp = Profile.groupby(['Member_Since_Year','Gender'])['Gender'].agg({'count'}).rename(columns ={'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
Temp.reset_index(drop = False, inplace = True)
Temp['Member_Since_Year'] = Temp['Member_Since_Year'].astype(str)
Temp.columns = [x.replace('_',' ') for x in Temp.columns]
Colors =  ['LightPink','CornFlowerBlue','LightGreen']

fig = px.bar(Temp, y= 'Member Since Year', x= 'Percentage', orientation='h',
             color = 'Gender', text = 'Percentage', color_discrete_sequence= Colors, hover_data = Temp.columns, height= 450)
fig.update_traces(marker_line_color=  'Navy', marker_line_width=1, opacity=1)
fig.update_traces(texttemplate='%{text:.2}%', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide')
fig['layout']['xaxis'].update(range=[0, 40])
fig.update_layout(plot_bgcolor= 'white')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_layout(title={'text': 'Membership Growth Over Year by Gender',
                         'x':0.46, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Count Percentage
Member_Since_Year Gender
2013 Female 74 0.44
Male 197 1.16
Other 15 0.09
2014 Female 138 0.81
Male 520 3.06
Other 33 0.19
2015 Female 663 3.90
Male 905 5.32
Other 262 1.54
2016 Female 1509 8.88
Male 1483 8.72
Other 534 3.14
2017 Female 2443 14.37
Male 3067 18.04
Other 959 5.64
2018 Female 1302 7.66
Male 2312 13.60
Other 584 3.44

Assuming the data is a snapshot of the end of 2018. get member tenure by the number of months.

In [9]:
Temp = Profile[['Member_Tenure','Gender']].dropna()
Temp.columns = [x.replace('_',' ') for x in Temp.columns]

Colors =  ['LightPink','CornFlowerBlue','LightGreen']
fig = px.histogram(Temp, x = 'Member Tenure', color='Gender', nbins=100,  marginal= 'box',  color_discrete_sequence= Colors,
                  hover_data= Temp.columns)
fig.update_layout(plot_bgcolor= 'white')
fig.update_layout(title={'text': 'Member Tenure Distribution by Gender',
                         'x':0.5, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'},
                 yaxis_title='Frequency')
fig.update_traces(marker_line_color= 'Navy', marker_line_width=1, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[Temp, 1600])
fig['layout']['xaxis'].update(range=[Temp['Member Tenure'].min()-1, Temp['Member Tenure'].max()+1])
fig.show()

Event Plots

In [10]:
Summary_Transcript = pd.DataFrame(Transcript.groupby(['Event'])['Event'].count())
Summary_Transcript.columns = ['Count']
display(Summary_Transcript.T)
data = {'Completed': [100*Summary_Transcript.loc['Offer Completed','Count']/Summary_Transcript.loc['Offer Viewed','Count']],
        'Viewed':[100*Summary_Transcript.loc['Offer Viewed','Count']/Summary_Transcript.loc['Offer Received','Count']]}
Group = pd.DataFrame(data = data).round(2).T
Group.columns = ['Percentage']
display(Group.T)

fig = plt.figure(figsize=(13, 5), constrained_layout=True)
gs = fig.add_gridspec(1, 5)
ax0 = fig.add_subplot(gs[:-1])
ax1 = fig.add_subplot(gs[-1])
_ = sns.barplot(ax = ax0, y="Event", x="Count", palette='Purples',
                edgecolor='k',  hatch="///", data=Summary_Transcript.reset_index(drop = False))
_ = ax0.set_xlim([0, 14e4])
_ = ax0.set_xlabel('Count', fontsize = 14)

_ = sns.barplot(ax = ax1, x="index", y="Percentage", palette='Blues',
                edgecolor='k',  hatch="///", data=Group.reset_index(drop = False))
_ = ax1.set_xlabel(None, fontsize = 14)
_ = ax1.set_ylim([0,100])
_ = ax1.set_yticks(np.arange(0, 101, 10))

# plt.subplots_adjust(wspace=-0.2)
Event Offer Completed Offer Received Offer Viewed Transaction
Count 33579 76277 57725 138953
Completed Viewed
Percentage 58.17 75.68
In [11]:
Temp = Data.groupby(['Event','Offer_Type'])['Offer_Type'].agg({'count'}).rename(columns ={'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
Temp.reset_index(drop = False, inplace = True)
Temp.columns = [x.replace('_',' ') for x in Temp.columns]
Colors =   ['GreenYellow','LightSalmon','LightSkyBlue']

fig = px.bar(Temp, y= 'Offer Type', x= 'Percentage', orientation='h',
             color = 'Event', text = 'Percentage', color_discrete_sequence= Colors, hover_data = Temp.columns, height= 350)
fig.update_traces(marker_line_color=  'Navy', marker_line_width=1, opacity=1)
fig.update_traces(texttemplate='%{text:.2}%', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide')
fig['layout']['xaxis'].update(range=[0, 50])
fig.update_layout(plot_bgcolor= 'white')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_layout(title={'text': 'Event by Offer Type',
                         'x':0.46, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Count Percentage
Event Offer_Type
Offer Completed BOGO 15669 9.35
Discount 17910 10.69
Offer Received BOGO 30499 18.20
Discount 30543 18.23
Informational 15235 9.09
Offer Viewed BOGO 25449 15.19
Discount 21445 12.80
Informational 10831 6.46

A Customer's Journey

First off,

In [12]:
Data['Event'].unique().tolist()
Out[12]:
['Offer Received', 'Offer Viewed', 'Transaction', 'Offer Completed']

Now, let's focus on those customers that completed an offer.

In [13]:
# creating a list of these customers
offer_completed_list = Data.loc[Data['Event'] == 'Offer Completed', 'Person'].tolist()

The journey of one of these customers can be analyzed as well. For example, the first customer from the above list

In [14]:
Customer_Journey = Data.loc[Data.Person == offer_completed_list[0],:].sort_index()
Customer_Journey.head(6).style.hide_index()
Out[14]:
Person Event Time Amount Reward_Received Offer_ID Reward_Defined Difficulty Duration Offer_Type Email Mobile Social Web
9fa9ae8f57894cc9a3b8a9bbe0fc1b2f Offer Received 0 nan nan 2906b810c7d4411798c6938adc9daaa5 2.000000 10.000000 7.000000 Discount 1.000000 1.000000 0.000000 1.000000
9fa9ae8f57894cc9a3b8a9bbe0fc1b2f Offer Viewed 0 nan nan 2906b810c7d4411798c6938adc9daaa5 2.000000 10.000000 7.000000 Discount 1.000000 1.000000 0.000000 1.000000
9fa9ae8f57894cc9a3b8a9bbe0fc1b2f Transaction 0 34.560000 nan nan nan nan nan nan nan nan nan nan
9fa9ae8f57894cc9a3b8a9bbe0fc1b2f Offer Completed 0 nan 2.000000 2906b810c7d4411798c6938adc9daaa5 2.000000 10.000000 7.000000 Discount 1.000000 1.000000 0.000000 1.000000
9fa9ae8f57894cc9a3b8a9bbe0fc1b2f Transaction 42 21.550000 nan nan nan nan nan nan nan nan nan nan
9fa9ae8f57894cc9a3b8a9bbe0fc1b2f Transaction 114 32.870000 nan nan nan nan nan nan nan nan nan nan

Feature Difference by Offer Combination

To have plots for this part. We need to calulate transactions amount without any offers and The number of transaction without any offers.

In [15]:
# Transactions amount without any offers
User_Data['Tran_Amnt_No_Offer'] = User_Data['Tot_Tran_Amnt']- User_Data['Offer_Trans_Amnt']
# The number of transaction without any offers
User_Data['Trans_Cnt_No_Offer'] = User_Data['Tot_Tran_Cnt']- User_Data['Offer Completed']
# Converting to integers
User_Data[['BOGO_offer','Disc_offer','Info_offer']] = User_Data[['BOGO_offer','Disc_offer','Info_offer']].astype(int)

Metric Average of Offer Type Combinations plots

In the following tables and plots, (BOGO Offer, Discount Offer, Informational Offer) shows that what offer has been used. For example, (1,1,0) means people who respond to BOGO Offers and Discount Offers but not Informational Offers.

In [16]:
def Mean_by_Offer_Type (Columns):    
    j = Columns[0]
    Group = User_Data.groupby(['BOGO_offer','Disc_offer','Info_offer'])[j].mean().reset_index()
    for j in Columns[1:]:
        Temp = User_Data.groupby(['BOGO_offer','Disc_offer','Info_offer'])[j].mean().reset_index()
        Group = Group.merge(Temp, on =['BOGO_offer','Disc_offer','Info_offer'])
        del Temp
    return Group
In [17]:
Column_List = ['Offer_Tran_Cnt_Ratio', 'Offer_Trans_Amnt_Ratio', 'Offer_Comp_View_Ratio', 'Offer_Comp_Rec_Ratio']

Group = Mean_by_Offer_Type (Column_List).set_index(['BOGO_offer','Disc_offer','Info_offer'])
display(Group.round(4))

Colors =  ['GreenYellow', 'Bisque','LightSalmon','Plum', 'LightSkyBlue']
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(16, 7), sharex=False)
_ = Group.plot.bar(ax = ax, rot = 0, edgecolor = 'Navy', color = Colors, hatch = '//')
_ = ax.set_title('Metric Average of Offer Type Combinations')
_ = ax.legend(('Offer Transactions Count Ratio', 'Offer Transactions Amount Ratio',
            'Offer Completed View Ratio', 'Offer Completed Receive Ratio'), loc='upper left', fontsize = 12)
_ = ax.set_xlabel('(BOGO Offer, Discount Offer, Informational Offer)')
_ = ax.set_ylim([0,1])
Offer_Tran_Cnt_Ratio Offer_Trans_Amnt_Ratio Offer_Comp_View_Ratio Offer_Comp_Rec_Ratio
BOGO_offer Disc_offer Info_offer
0 0 0 0.0000 0.0000 0.0000 0.0000
1 0.2110 0.3135 0.2807 0.2892
1 0 0.2383 0.4033 0.5249 0.3604
1 0.3114 0.5239 0.5364 0.5888
1 0 0 0.2728 0.3540 0.5516 0.3782
1 0.3542 0.5111 0.5501 0.6056
1 0 0.3840 0.4542 0.7589 0.6470
1 0.4140 0.5511 0.6838 0.7924
In [18]:
Column_List = ['Offer Completed', 'Tot_Tran_Cnt', 'Trans_Cnt_No_Offer', 'Tot_Rewards_Rec', 'Reward_per_Offer','Difficulty_per_Offer']
Group = Mean_by_Offer_Type (Column_List).set_index(['BOGO_offer','Disc_offer','Info_offer'])
display(Group.round(4))

Colors =  ['Blue', 'Orange', 'DarkGreen','Purple','LightPink', 'Crimson']

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(16, 7), sharex=False)
_ = Group.plot.bar(ax = ax, rot = 0, edgecolor = 'Navy', color = Colors)
_ = ax.set_title('Metric Average of Offer Type Combinations')
_ = ax.legend(('Offer Completed', 'Total Transaction Count',
                'Total Transaction Count (without any offers)', 'Total Rewards Received', 'Reward per Offer',
               'Difficulty per Offer'), loc='upper left', fontsize = 12)
_ = ax.set_xlabel('(BOGO Offer, Discount Offer, Informational Offer)')
_ = ax.set_ylim([0,20])
_ = ax.set_yticks(np.arange(0, 21, 2))
Offer Completed Tot_Tran_Cnt Trans_Cnt_No_Offer Tot_Rewards_Rec Reward_per_Offer Difficulty_per_Offer
BOGO_offer Disc_offer Info_offer
0 0 0 0.0000 4.4779 4.4779 1.5248 0.0000 0.0000
1 1.1944 7.5111 6.3168 1.6380 1.4593 0.0000
1 0 1.4905 9.0246 7.5341 7.1875 5.1920 9.5161
1 2.6759 10.8667 8.1909 6.6585 2.5487 5.2188
1 0 0 1.5340 7.7338 6.1998 15.7631 10.7039 7.2817
1 2.7579 9.5486 6.7907 14.9018 5.4684 3.9611
1 0 2.9435 9.8640 6.9205 18.6081 6.5097 8.8200
1 3.9506 11.2950 7.3444 16.6150 4.2396 6.2400
In [19]:
Column_List = [ 'Tot_Tran_Amnt', 'Offer_Trans_Amnt', 'Tran_Amnt_per_Offer', 'Ave_Tran_Amnt', 'Tran_Amnt_No_Offer']
Group = Mean_by_Offer_Type (Column_List).set_index(['BOGO_offer','Disc_offer','Info_offer'])
display(Group.round(4))

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(16, 7), sharex=False)
_ = Group.plot.bar(ax = ax, rot = 0, edgecolor = 'k',
    color = sns.set_palette(["#9b59b6", "#3498db", "#e74c3c", "#34495e","#2ecc71","#95a5a6"]))
_ = ax.set_title('Metric Average of Offer Type Combinations')
_ = ax.legend(('Total Transaction Amount', 'Transaction Amount (with Offers)',
                'Transaction Amount per Offer', 'Average Transaction Amount',
               'Transaction Amount (without any Offers)'), loc='upper left', fontsize = 12)
_ = ax.set_xlabel('(BOGO Offer, Discount Offer, Informational Offer)')
_ = ax.set_ylim([0,200])
_ = ax.set_yticks(np.arange(0, 201, 20))
Tot_Tran_Amnt Offer_Trans_Amnt Tran_Amnt_per_Offer Ave_Tran_Amnt Tran_Amnt_No_Offer
BOGO_offer Disc_offer Info_offer
0 0 0 26.2267 0.0000 0.0000 6.1674 26.2267
1 46.7299 14.4213 11.4998 7.2478 32.3086
1 0 95.1184 29.5337 19.3329 13.3275 65.5847
1 115.7472 52.0649 18.9348 12.5913 63.6824
1 0 0 120.0015 32.7871 21.4729 17.6009 87.2144
1 146.2316 66.4001 23.5046 16.7318 79.8316
1 0 175.0637 66.7588 22.2873 19.7209 108.3049
1 187.4890 93.1559 23.3329 17.9300 94.3331
  • We can see from the above plot that the customers who have taken advantage of all three offer types not only spend the most with offers but also have the highest total transaction amount. In other words, the higher amount of incentivization is, the more is spent.

  • The BOGO offer is the leading offer in terms of having a higher total transaction amount than discount and informational offers. Besides, customers tend to spend more than half of the total transaction amount without offers while using BOGO and discount offers.

We would like to know how the overall offer completion rate, transaction amount ratio motivated by an offer, and reward per offer affected by the user demographics.

Offer Completion, Offer Transaction Amount and Reward Per Offer by Gender

In [20]:
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(13, 13), sharex=False)

# Average Offer Completion RatioBy Membership Tenure
# [0,0]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Member_Tenure').mean()['Offer_Comp_Rec_Ratio']
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Member_Tenure').mean()['Offer_Comp_Rec_Ratio']

_ = ax[0,0].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[0,0].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[0,0].set_xlim(0,70)
_ = ax[0,0].set_ylim(0,1)
_ = ax[0,0].set_xlabel('Member Tenure')
_ = ax[0,0].set_title('Average Offer Completion Ratio\nBy Membership Tenure')
_ = ax[0,0].legend(loc='lower right')

# Average Offer Transaction Amount Ratio By Membership Tenure
# [0,1]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Member_Tenure')['Offer_Trans_Amnt_Ratio'].mean()
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Member_Tenure')['Offer_Trans_Amnt_Ratio'].mean()

_ = ax[0,1].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[0,1].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[0,1].set_xlim(0,70)
_ = ax[0,1].set_ylim(0,1)
_ = ax[0,1].set_xlabel('Member Tenure')
_ = ax[0,1].set_title('Average Offer Transaction Amount Ratio\nBy Membership Tenure')
_ = ax[0,1].legend(loc='lower right')

# Average Reward Per Offer By Membership Tenure
# [0,2]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Member_Tenure')['Reward_per_Offer'].mean()
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Member_Tenure')['Reward_per_Offer'].mean()

_ = ax[0,2].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[0,2].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[0,2].set_xlim(0,70)
_ = ax[0,2].set_ylim(0,8)
_ = ax[0,2].set_xlabel('Member Tenure')
_ = ax[0,2].set_title('Average Reward Per Offer\nBy Membership Tenure')
_ = ax[0,2].legend(loc='lower left')

# Average Offer Completion Ratio By Income
# [1,0]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Income').mean()['Offer_Comp_Rec_Ratio']
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Income').mean()['Offer_Comp_Rec_Ratio']

_ = ax[1,0].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[1,0].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[1,0].set_xlim(0,130000)
_ = ax[1,0].set_ylim(0,0.7)
_ = ax[1,0].set_xlabel('Income')
_ = ax[1,0].set_title('Average Offer Completion Ratio\nBy Income')
_ = ax[1,0].legend(loc='lower left')

# Average Offer Transaction Amount Ratio By Income
# [1,1]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Income')['Offer_Trans_Amnt_Ratio'].mean()
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Income')['Offer_Trans_Amnt_Ratio'].mean()

_ = ax[1,1].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[1,1].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[1,1].set_xlim(0,130000)
_ = ax[1,1].set_ylim(0,0.5)
_ = ax[1,1].set_xlabel('Income')
_ = ax[1,1].set_title('Average Offer Transaction Amount Ratio\nBy Income')
_ = ax[1,1].legend(loc='lower left')

# Average Reward Per Offer By Income
# [1,2]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Income')['Reward_per_Offer'].mean()
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Income')['Reward_per_Offer'].mean()

_ = ax[1,2].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[1,2].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[1,2].set_xlim(0,130000)
_ = ax[1,2].set_ylim(0,12)
_ = ax[1,2].set_xlabel('Income')
_ = ax[1,2].set_title('Average Reward Per Offer By Income')
_ = ax[1,2].legend(loc='lower left')

# Average Offer Completion Ratio By Age
# [2,0]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Age').mean()['Offer_Comp_Rec_Ratio']
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Age').mean()['Offer_Comp_Rec_Ratio']

_ = ax[2,0].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[2,0].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[2,0].set_xlim(0,120)
_ = ax[2,0].set_ylim(0,0.8)
_ = ax[2,0].set_xlabel('Income')
_ = ax[2,0].set_title('Average Offer Completion Ratio By Age')
_ = ax[2,0].legend(loc='upper left')

# Average Offer Transaction Amount Ratio By Age
# [2,1]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Age')['Offer_Trans_Amnt_Ratio'].mean()
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Age')['Offer_Trans_Amnt_Ratio'].mean()

_ = ax[2,1].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[2,1].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[2,1].set_xlim(0,120)
_ = ax[2,1].set_ylim(0,1)
_ = ax[2,1].set_xlabel('Income')
_ = ax[2,1].set_title('Average Offer Transaction Amount Ratio\nBy Age')
_ = ax[2,1].legend(loc='upper left')

# Average Reward Per Offer By Age
# [2,2]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Age')['Reward_per_Offer'].mean()
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Age')['Reward_per_Offer'].mean()
_ = ax[2,2].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[2,2].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[2,2].set_xlim(0,120)
_ = ax[2,2].set_ylim(0,16)
_ = ax[2,2].set_xlabel('Income')
_ = ax[2,2].set_title('Average Reward Per Offer By Age')
_ = ax[2,2].legend(loc='upper left')

_ = plt.tight_layout()

Offer Conversion Rate

In [21]:
User_Data['BOGO_Conv_Rate'] = User_Data['BOGO_comp']/User_Data['BOGO_Offer_Rec']
User_Data['Disc_Conv_Rate'] = User_Data['Disc_comp']/User_Data['Disc_Offer_Rec']
User_Data['Info_Conv_Rate'] = User_Data['Info_comp']/User_Data['Info_Offer_Rec']

Temp = User_Data[['BOGO_Conv_Rate', 'Disc_Conv_Rate','Info_Conv_Rate']].agg({'mean'}).T
display(Temp.T)
BOGO_Conv_Rate Disc_Conv_Rate Info_Conv_Rate
mean 0.36438 0.414618 0.390084
In [22]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(14, 7), sharex=False)
_ = Temp.plot.bar(ax = ax[0], legend = False, color= 'mediumorchid', edgecolor='indigo', hatch = '///', fontsize = 14,rot=0)
_ = ax[0].set_xticks(np.arange(3))
_ = ax[0].set_xticklabels(['BOGO Offer\nConversion Rate', 'Information Offer\nConversion Rate',
                            'Discount Offer\nConversion Rate'])
_ = ax[0].set_title('Ave Conversion Rate by Offer', fontsize = 14)
_ = ax[0].set_xlabel('Average', fontsize = 14)
_ = ax[0].set_ylim([0, .5])
_ = ax[0].set_yticks(np.arange(0, .55, 0.05))

_ = Temp.plot.pie(ax = ax[1], y= 'mean', startangle=90, label = '', labels = None,
                  colors = sns.set_palette(['lightskyblue','limegreen','lightsalmon']),
                  legend=True, autopct='%1.1f%%', fontsize=14,
                  pctdistance=0.85, explode = (0.05,0.05,0.05))
_ = ax[1].legend(bbox_to_anchor=(.25, 1), labels= ['BOGO Offer Conversion Rate', 'Information Offer Conversion Rate',
                                                'Discount Offer Conversion Rate'], fontsize = 12)
_ = ax[1].add_artist(plt.Circle((0,0),0.70,fc='white'))
In [23]:
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(13, 13), sharex=False)

# Average BOGO Offer Conversion Rate by Income
# [0,0]

FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Income')['BOGO_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Income')['BOGO_Conv_Rate'].mean()

_ = ax[0,0].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[0,0].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[0,0].set_xlim([0,130000])
_ = ax[0,0].set_ylim([0,1])
_ = ax[0,0].set_xlabel('Income')
_ = ax[0,0].set_title('Average BOGO Offer Conversion Rate\nby Income')
_ = ax[0,0].legend(loc='upper left')

# Average Discount Offer Conversion Rate by Income
# [0,1]

FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Income')['Disc_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Income')['Disc_Conv_Rate'].mean()

_ = ax[0,1].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[0,1].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[0,1].set_xlim([0,130000])
_ = ax[0,1].set_ylim([0,1])
_ = ax[0,1].set_xlabel('Income')
_ = ax[0,1].set_title('Average Discount Offer Conversion Rate\nby Income')
_ = ax[0,1].legend(loc='upper left')

# Average Information Offer Conversion Rate by Income
# [0,2]

FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Income')['Info_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Income')['Info_Conv_Rate'].mean()

_ = ax[0,2].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[0,2].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[0,2].set_xlim([0,130000])
_ = ax[0,2].set_ylim([0,1])
_ = ax[0,2].set_xlabel('Income')
_ = ax[0,2].set_title('Average Information Offer Conversion Rate\nby Income')
_ = ax[0,2].legend(loc='upper left')

# Average BOGO Offer Conversion Rate by Tenure
# [1,0]

FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Member_Tenure')['BOGO_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Member_Tenure')['BOGO_Conv_Rate'].mean()

_ = ax[1,0].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[1,0].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[1,0].set_xlim([0,70])
_ = ax[1,0].set_ylim([0,1])
_ = ax[1,0].set_xlabel('Tenure')
_ = ax[1,0].set_title('Average BOGO Offer Conversion Rate\nby Tenure')
_ = ax[1,0].legend(loc='upper left')

# # Average Discount Offer Conversion Rate by Tenure
# [1,1]

FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Member_Tenure')['Disc_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Member_Tenure')['Disc_Conv_Rate'].mean()

_ = ax[1,1].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[1,1].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[1,1].set_xlim(0,70)
_ = ax[1,1].set_ylim(0,1)
_ = ax[1,1].set_xlabel('Tenure')
_ = ax[1,1].set_title('Average Discount Offer Conversion Rate\nby Tenure')
_ = ax[1,1].legend(loc='upper left')

# Average Information Offer Conversion Rate by Tenure
# [1,2]

FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Member_Tenure')['Info_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Member_Tenure')['Info_Conv_Rate'].mean()

_ = ax[1,2].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[1,2].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[1,2].set_xlim([0,70])
_ = ax[1,2].set_ylim([0,1])
_ = ax[1,2].set_xlabel('Tenure')
_ = ax[1,2].set_title('Average Information Offer Conversion Rate\nby Tenure')
_ = ax[1,2].legend(loc='upper left')

# Average BOGO Offer Conversion Rate by Age
# [2,0]

FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Age')['BOGO_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Age')['BOGO_Conv_Rate'].mean()
_ = ax[2,0].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[2,0].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[2,0].set_xlim([0,100])
_ = ax[2,0].set_ylim([0,1])
_ = ax[2,0].set_xlabel('Age')
_ = ax[2,0].set_title('Average BOGO Offer Conversion Rate\nby Age')
_ = ax[2,0].legend(loc='upper left')

# Average Discount Offer Conversion Rate by Age
# [2,1]

FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Age')['Disc_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Age')['Disc_Conv_Rate'].mean()

_ = ax[2,1].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[2,1].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[2,1].set_xlim([0,100])
_ = ax[2,1].set_ylim([0,1])
_ = ax[2,1].set_xlabel('Age')
_ = ax[2,1].set_title('Average Discount Offer Conversion Rate\nby Age')
_ = ax[2,1].legend(loc='upper left')

# Average Information Offer Conversion Rate by Age
# [2,2]

FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Age')['Info_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Age')['Info_Conv_Rate'].mean()

_ = ax[2,2].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[2,2].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[2,2].set_xlim([0,100])
_ = ax[2,2].set_ylim([0,1])
_ = ax[2,2].set_xlabel('Age')
_ = ax[2,2].set_title('Average Information Offer Conversion Rate\nby Age')
_ = ax[2,2].legend(loc='upper left')

_ = plt.tight_layout()